*******************************************
*******************************************
*	CREATING TRANSACTION AND 	  		  *
*			ASSESSMENT  DATASETS		  *
*******************************************
*******************************************

clear
set more off
*cd "C:\Users\mrh105\Box\EDWIN_DEP_Well_Data\Washington_County_Project\Forgone Value Calculations and Shifting Test" /*PC*/
cd "C:\Users\maxha\Box\EDWIN_DEP_Well_Data\Washington_County_Project\Final Code\Forgone Value Calculations and Shifting Test" /*Home PC*/

*****************************************************
***STEP 1: CREATING THE RELEVANT TAX ROLL DATASET***
*****************************************************

****1a: Grab Vacant Taxable Parcels from Tax Roll Main Buildings

	*use "C:\Users\mrh105\Box\EDWIN_DEP_Well_Data\Washington_County_Project\Building Data\WC_TaxRoll_2017.dta"
	use "C:\Users\maxha\Box\EDWIN_DEP_Well_Data\Washington_County_Project\Final Code\Building Data\WC_TaxRoll_2017.dta"
		gen type= "main"
		sort PARID CARD
		duplicates drop PARID CARD CLASS SFLA YRBLT ADRNO STORIES BLDVAL, force
		keep if SFLA==. & BLDVAL==0 
		gen PIN1= substr(PARID, 1,3)
		gen PIN2= substr(PARID, 4,3)
		gen PIN3= substr(PARID, 7,2)
		gen PIN4= substr(PARID, 9,2)
		gen PIN5= substr(PARID, 11,4)
		gen PIN6= substr(PARID, 15,.)
		gen PIN= PIN1+"-"+PIN2+"-"+PIN3+"-"+PIN4+"-"+PIN5+"-"+PIN6
	save vacant_taxable_parcels.dta, replace

****1b: Merge Building Observations and their Square Meter Footprints with Vacant Parcels
	clear
	use WC_TaxRoll_GIS_ForgoneValue.dta
		drop _merge
		merge 1:1 bldg_id using "C:\Users\maxha\Box\EDWIN_DEP_Well_Data\Washington_County_Project\Final Code\Compiling Near Table\Building_Area_SqM.dta"
		*merge 1:1 bldg_id using "C:\Users\mrh105\Box\EDWIN_DEP_Well_Data\Washington_County_Project\Compiling Near Table\Building_Area_SqM.dta"
		drop _merge
		append using vacant_taxable_parcels.dta
		drop if LUC=="411:Coal - active" | LUC=="412:Coal - reserve" | LUC=="413:mined out coal" | LUC=="425:Natural Gas Related" | LUC=="900:U.S. Government" | LUC=="901:State Of Pennsylvania" | LUC=="902:County Facility" | | LUC=="903:Municipality" | LUC=="904:Colleges" | LUC=="905:Charitable Organizations" | LUC=="906:Houses of Worship" |LUC=="C-D:Certified Deed not taxable" | LUC=="C-P:Certified Plan not taxable" | LUC=="350:Property Used For Postal Services" | LUC=="RV:repository value"| LUC=="VET:veterans exemption"| LUC=="955:Charitable - Hospitals"| LUC=="954:Function Halls, Community Centers"

****1c: Clean and create the relevant variables

		keep	PIN PIN1 LUC type ACRES   ///
				FootPrint_Area_SqM SFLA  ///
				UTIL1 BSMT ATTIC GRADE EXTWALL RMTOT RMBED FIXBATH FIXHALF STORIES HEAT /// 
				LANDVAL BLDVAL TOTALVAL  ///
				YRBLT  ///
				OWN1 OWN2 ADDR1 ADDR2 ADDR3 CITYNAME STATECODE ZIP1 ///
				SCHDIST
				
		foreach x in ACRES SFLA RMTOT RMBED FIXBATH FIXHALF LANDVAL BLDVAL TOTALVAL FootPrint_Area_SqM STORIES{
			replace `x'=0 if `x'==.
		}
				
		gen LUC_NUM= substr(LUC, 1,3)
		gen LUC_NUM2= real(LUC_NUM)
		
		replace type="commercial" if LUC_NUM2==031
		replace type="commercial" if LUC_NUM2>=300 & LUC_NUM2!=.
		
		drop LUC_NUM LUC_NUM2
		
		gen public_h2o = (UTIL1=="2:PUBLIC WATER" | UTIL1=="1:ALL PUBLIC")
		gen public_sewer = (UTIL1=="3:PUBLIC SEWER" | UTIL1=="1:ALL PUBLIC")
		gen public_gas = (UTIL1=="4:GAS" | UTIL1=="1:ALL PUBLIC")
		drop UTIL1 LUC
		
		gen attic_unfinished = (ATTIC=="2:UNFINISHED")
		gen attic_partial = (ATTIC=="3:PART FINISH")
		gen attic_finished = (ATTIC=="4:FULL FINISH" | ATTIC=="5:FULL FINISH")
		drop ATTIC
		
		gen BSMT_crawl = (BSMT=="2:CRAWL")
		gen BSMT_partial = (BSMT=="3:PARTIAL")
		gen BSMT_full = (BSMT=="4:FULL")
		drop BSMT
		
		replace EXTWALL="00:NO STRUCTURE" if EXTWALL==""
		xi i.EXTWALL
		drop EXTWALL
		
		rename _IEXTWALL_2 ext_wall_frame
		rename _IEXTWALL_3 ext_wall_brick
		rename _IEXTWALL_4 ext_wall_masonry_frame
		rename _IEXTWALL_5 ext_wall_block
		rename _IEXTWALL_6 ext_wall_stucco
		rename _IEXTWALL_7 ext_wall_vinyl
		rename _IEXTWALL_8 ext_wall_stone
		rename _IEXTWALL_9 ext_wall_composit
		rename _IEXTWALL_10 ext_wall_concrete
		rename _IEXTWALL_11 ext_wall_asbestos
				/*NO STRUCTURE IS OMITTED CATEGORY*/
		
		gen HEAT_noncentral = (HEAT=="1:NONE"|HEAT=="2:NON-CENTRAL")
		gen HEAT_central= (HEAT=="3:CENTRAL"|HEAT=="4:CENTRAL A/C")
		gen central_ac= (HEAT=="4:CENTRAL A/C")
		drop HEAT
	
		*Rolling Up By Parcel
		
		replace SFLA=0 if type!="main"
		gen School_District= substr(SCHDIST, 2,3)
		gen PIN2=real(PIN1)
		gen School_District1=real(School_District)
		drop SCHDIST PIN1 School_District
		
		gen buildings_num=1 if FootPrint_Area_SqM!=0
		
		foreach x in ACRES FootPrint_Area_SqM RMTOT RMBED FIXBATH FIXHALF STORIES buildings_num {
			gen commercial_`x'=`x' if type=="commercial"
			gen main_`x'=`x' if type=="main"
			gen out_`x'=`x' if type=="out"
			drop `x'
		}
				
		foreach x in YRBLT {
			gen commercial`x'=`x' if type=="commercial"
			gen main`x'=`x' if type=="main"
			gen out`x'=`x' if type=="out"
			drop `x'
		}
		
		bysort PIN: egen sum_out=total(out_FootPrint_Area_SqM)
		gen weight_out_YRBLT= outYRBLT*(out_FootPrint_Area_SqM/sum_out)
		
		replace type="amain" if type=="main"
		gsort PIN type -mainYRBLT -commercialYRBLT -outYRBLT
		
		collapse (sum) weight_out_YRBLT SFLA commercial_ACRES main_ACRES commercial_FootPrint_Area_SqM main_FootPrint_Area_SqM  out_FootPrint_Area_SqM ///
		commercial_RMTOT main_RMTOT  commercial_RMBED main_RMBED  commercial_FIXBATH main_FIXBATH  commercial_FIXHALF main_FIXHALF    ///
		commercial_buildings_num main_buildings_num  out_buildings_num main_STORIES commercial_STORIES ///
		(max) TOTALVAL PIN2 School_District1 ///
		commercialYRBLT mainYRBLT  outYRBLT (first) central_ac public_h2o public_sewer public_gas attic_unfinished attic_partial attic_finished BSMT_crawl ///
		BSMT_partial BSMT_full ext_wall_frame ext_wall_brick ext_wall_masonry_frame ext_wall_block ext_wall_stucco ext_wall_vinyl ext_wall_stone ext_wall_concrete ext_wall_asbestos ///
		ext_wall_composit HEAT_noncentral HEAT_central  ///
		(first) OWN1 OWN2 ADDR1 ADDR2 ADDR3 CITYNAME STATECODE ZIP1 ///
		, by (PIN)

		replace School_District1=0 if School_District1==.
		
		replace PIN2=0 if PIN2==.
		tostring PIN2, gen(PIN3)
		gen PIN4= substr(PIN3, -1,.)
		replace	PIN2=PIN2-1 if PIN4=="1"
		replace	PIN2=PIN2-2 if PIN4=="2"
		replace	PIN2=PIN2-3 if PIN4=="3"
		replace	PIN2=PIN2-4 if PIN4=="4"
		replace	PIN2=PIN2-5 if PIN4=="5"
		replace	PIN2=PIN2-6 if PIN4=="6"
		drop PIN3 PIN4
		
		drop if main_ACRES==0 & commercial_ACRES==0 /*DROPPING PARCELS WITHOUT ACREAGE*/
		
		merge m:1 PIN using Muni_Names_PIN.dta
		drop if _merge==2
		drop if _merge==1 /*Spot check suggest most of these are exempt properties, or were removed from tax roll, and thus from the GIS data*/
		drop _merge
		encode MCN, gen(MUNI)
		
merge 1:1 PIN using Parcel_CountWells.dta
	drop if _merge==2
	drop _merge
	
save Forgone_Value_Estimation_For_Transactions.dta, replace 
		
		gen commercial_parcel= (commercial_ACRES>0)
		gen total_FootPrint_Area_SqM= main_FootPrint_Area_SqM+out_FootPrint_Area_SqM+commercial_FootPrint_Area_SqM
		gen totalacres= main_ACRES+commercial_ACRES
		
save Forgone_Value_Estimation.dta, replace 

**********************************************
***STEP 2: CREATING THE TRANSACTION DATASET***
***********************************************
	import excel "C:\Users\maxha\Box\EDWIN_DEP_Well_Data\Washington_County_Project\Final Code\Forgone Value Calculations and Shifting Test\WC_Transactions_1970_2012.xlsx", sheet("Sheet1") firstrow clear
		keep Sale_date Sale_price Parcel_no
		gen sale_year= year(Sale_date)
		gen sale_quarter= quarter(Sale_date)
		gen sale_month= month(Sale_date)
		gen sale_yearquarter=(sale_year*10)+sale_quarter
		rename Parcel_no PIN
		gsort PIN -Sale_date -Sale_price
		duplicates drop PIN, force
		gen source= "WC"
		
		recast float Sale_date
		
		preserve
		clear
		import delimited "C:\Users\maxha\Box\EDWIN_DEP_Well_Data\Washington_County_Project\Final Code\Forgone Value Calculations and Shifting Test\Zillow_Transactions_WC.txt"
		keep transid sale_price sale_year censustractandblock sale_month sale_quarter sale_yearquarter tract sale_date pin
		rename pin PIN
		merge 1:1 transid using ZTRAX_lotsize.dta
		drop _merge
		gen Sale_date= substr(sale_date,1,10) 
		replace Sale_date= subinstr(Sale_date," 0","",.)
		gen date2 = date(Sale_date, "MD19Y")
		form date2 %tdnn/dd/CCYY
		drop Sale_date
		rename date2 Sale_date 
		recast float Sale_date
		rename sale_price Sale_price
		save Zillow_Transactions_WC_clean.dta, replace
		restore
		
		append using Zillow_Transactions_WC_clean.dta
		replace source="Zillow" if source==""
		
		drop if PIN==""
	
save WC_Transactions_1970_2017.dta,replace

clear
use Forgone_Value_Estimation_For_Transactions.dta

	merge 1:m PIN using WC_Transactions_1970_2017.dta
	keep if _merge==3
	
	drop if Sale_price<=1
	duplicates tag Sale_date Sale_price source MUNI, gen(dup)

	gen year= sale_year
	gen cpi=1 /*CONVERTS TO REAL DOLLARS*/
	replace cpi= 0.289196676 if year== 1978 /*Updated CPI-U-RS, All items, 1977-2018  FROM: https://www.bls.gov/cpi/research-series/home.htm#CPI-U-RS%20Data */ /*ALSO DL'd in my data folder as Updated CPI-U-RS, All items, 1977-2018*/
	replace cpi= 0.316620499 if year== 1979
	replace cpi= 0.352077562 if year== 1980
	replace cpi= 0.38531856 if year== 1981
	replace cpi= 0.408587258 if year== 1982
	replace cpi= 0.426038781 if year== 1983
	replace cpi= 0.443767313 if year== 1984
	replace cpi= 0.45900277 if year== 1985
	replace cpi= 0.467036011 if year== 1986
	replace cpi= 0.483102493 if year== 1987
	replace cpi= 0.500554017 if year== 1988
	replace cpi= 0.522437673 if year== 1989
	replace cpi= 0.548199446 if year== 1990
	replace cpi= 0.568144044 if year== 1991
	replace cpi= 0.582271468 if year== 1992
	replace cpi= 0.596952909 if year== 1993
	replace cpi= 0.609418283 if year== 1994
	replace cpi= 0.624099723 if year== 1995
	replace cpi= 0.640720222 if year== 1996
	replace cpi= 0.654570637 if year== 1997
	replace cpi= 0.663434903 if year== 1998
	replace cpi= 0.677562327 if year== 1999
	replace cpi= 0.700554017 if year== 2000
	replace cpi= 0.720498615 if year== 2001
	replace cpi= 0.731855956 if year== 2002
	replace cpi= 0.748476454 if year== 2003
	replace cpi= 0.768698061 if year== 2004
	replace cpi= 0.794736842 if year== 2005
	replace cpi= 0.820498615 if year== 2006
	replace cpi= 0.843767313 if year== 2007
	replace cpi= 0.876177285 if year== 2008
	replace cpi= 0.873130194 if year== 2009
	replace cpi= 0.887534626 if year== 2010
	replace cpi= 0.915512465 if year== 2011
	replace cpi= 0.934903047 if year== 2012
	replace cpi= 0.948753462 if year== 2013
	replace cpi= 0.964819944 if year== 2014
	replace cpi= 0.966481994 if year== 2015
	replace cpi= 0.978947368 if year== 2016
	replace cpi= 1 if year==2017
	replace cpi= 1.024376731 if year== 2018
	replace cpi= 1.042936288 if year== 2019
	
	replace Sale_price=Sale_price/cpi
	
	/*DEALING WITH MULTI PARCEL TRANSACTIONS*/

		preserve
			keep if dup>0
					drop weight_out_YRBLT
					bysort Sale_date Sale_price source MUNI: egen sum_out=total(out_FootPrint_Area_SqM)
					gen weight_out_YRBLT= outYRBLT*(out_FootPrint_Area_SqM/sum_out)	
					
			collapse (sum) weight_out_YRBLT SFLA commercial_ACRES main_ACRES lotsizeacres_ztrax commercial_FootPrint_Area_SqM main_FootPrint_Area_SqM out_FootPrint_Area_SqM ///
			  commercial_RMTOT main_RMTOT commercial_RMBED main_RMBED commercial_FIXBATH main_FIXBATH commercial_FIXHALF main_FIXHALF ///
			   commercial_buildings_num main_buildings_num out_buildings_num main_STORIES commercial_STORIES TOTALVAL ///
			  (first) PIN PIN2 sale_year sale_quarter sale_month sale_yearquarter MCN PINnoDash School_District1 OWN1 OWN2 ADDR1 ADDR2 ADDR3 CITYNAME STATECODE ZIP1 ///
			  (max) ext_wall_frame ext_wall_brick ext_wall_masonry_frame ext_wall_block ext_wall_stucco ext_wall_vinyl ext_wall_stone ext_wall_concrete ext_wall_asbestos ext_wall_composit commercialYRBLT mainYRBLT outYRBLT central_ac public_h2o public_sewer public_gas attic_unfinished attic_partial attic_finished BSMT_crawl BSMT_partial BSMT_full HEAT_noncentral HEAT_central ///
			  , by(Sale_date Sale_price source MUNI)

			gen total_FootPrint_Area_SqM= main_FootPrint_Area_SqM+out_FootPrint_Area_SqM+commercial_FootPrint_Area_SqM	
			gen totalacres= main_ACRES+commercial_ACRES
			gen commercial_parcel= (commercial_ACRES>0)
			
			save multi_trans_merge_back.dta, replace
		restore	  
		
		drop if dup>0
			gen total_FootPrint_Area_SqM= main_FootPrint_Area_SqM+out_FootPrint_Area_SqM+commercial_FootPrint_Area_SqM	
			gen totalacres= main_ACRES+commercial_ACRES
			gen commercial_parcel= (commercial_ACRES>0)
		append using multi_trans_merge_back.dta 
		drop _merge dup
		erase multi_trans_merge_back.dta
		
		*DEALING WITH POOR MATCHES OF ZILLOW AND PIN*
		gen acre_diff= totalacres-lotsizeacres_ztrax
		gen percent_acre_diff= abs(acre_diff/totalacres)
		replace percent_acre_diff=0 if percent_acre_diff==.
		drop if percent_acre_diff>.05
		
		replace weight_out_YRBLT= round(weight_out_YRBLT,1)
		
	save Forgone_Value_Estimation_Transactions.dta, replace


	erase vacant_taxable_parcels.dta
	erase WC_Transactions_1970_2017.dta
	erase Forgone_Value_Estimation_For_Transactions.dta
	*erase Forgone_Value_Estimation_Transactions.dta 